* This file constructs Weibull distributed survival curves from individual reports of survival expectations *
* Constructs figure of subjective v objective annuity rates (Figure 5) *
* Creates inputs for annuitisation model *

* setup *
clear all
set maxvar 30000
set more off

 graph set window fontface "Times New Roman"

	* Get ELSA data *

clear
use "$savedata\wave1-7base.dta"

/********************/
/* Sample selection */
/********************/

gen sample1 = 0
gen sample2 = 0
replace sample1 = 1 if inlist(finstat,1,7,14,25,33) & !(exlo80 < 0 | exlo80 == 100 | exlo80 == .) & !(exlo90 < 0 | exlo90 == 100 | exlo90 == .) & !(exlo80 <= exlo90)
replace sample2 = 1 if inlist(finstat,1,7,14,25,33) & !(exlo80 < 0 | exlo80 == .) & !(exlo90 < 0 | exlo90 == .) & !(exlo80 <= exlo90)

drop if age == 70 // 3 obs that are aged 70: shouldn't be possible
drop if sample1 == 0 & sample2 == 0 // drop those not used in analysis

preserve 
keep idauniq wave sample1 sample2
save "$temp\sample_indicators.dta", replace
restore

/**********************************************************************************************/
/* Calculate two parameters of Weibull distribution for each individual with 2 answers 		  */
/**********************************************************************************************/

/* Starting values of k and lambda for sample */
replace exlo80 = exlo80/100
replace exlo90 = exlo90/100
* Replace 0 and 100 answers with 0.1% and 99.9% (for purposes of creating intial values only) *
replace exlo80 = 0.001 if exlo80 == 0
replace exlo90 = 0.001 if exlo90 == 0
replace exlo80 = 0.999 if exlo80 == 1
replace exlo90 = 0.999 if exlo90 == 1
gen     t1 = 75-age if age<66
replace t1 = 80-age if age>65
gen     t2 = 85-age
gen k = ln(ln(exlo80)/ln(exlo90))/(ln(t1)-ln(t2))
gen lambda = (1/t1)*((-ln(exlo80))^(1/k))
gen invlambda = 1/lambda
*replace invlambda=. if invlambda>5000

/**********************************************************************************************/
/* Now, impose third point and then fit Weibull according to least squares					  */
/**********************************************************************************************/

* We are going to merge in the individual's probability of reaching age 110, according to life tables *
* We treat this as a third "answer" that the individual gives. *

gen t3 = 110 - age

// Method with sex-age-cohort specific probabilities of surviving to age 100

* Merge in scaled life table data given sex, dobyear, age
drop prob*
merge m:1 dobyear sex age using "$temp\prob_survive_scaled.dta"
assert _merge != 1
keep if _merge == 3
drop _merge

* calculate probability of surviving to 100 given current age, cohort and sex
gen exlo3 = prob110
* recode exlo3 to 0 if exlo2 is 0 *
replace exlo3 = 0 if (exlo90 == 0 | exlo80 == 0)

rename exlo80 exlo1
rename exlo90 exlo2

* keep the Weibull parameters that were calculated earlier from the 2 answers only *
* we use these as initial values in the least squares procedure *

keep exlo? t? idauniq k lambda invlambda wave sex sample1 
reshape long t exlo, i(idauniq wave) j(age) // data set is now at answer level
gen minuslexlo = -ln(exlo)

* we then calculate new Weibull paramters for each observation of each individual which minimise squared distance to 3 "answers" *
* we do this wave-by-wave beacause individuals are interviewed multiple times *

* NOTE: this takes a long time to run

forv wave = 3(1)7 {

preserve
keep if wave == `wave'

drop if k == .		
drop if lambda == . 

egen id = group(idauniq)
sum id
local firstid = r(min)
local lastid = r(max)
gen newk = .
gen newlambda = .

* for each individual, perform the non-linear least squares procedure to get new Weibull parameters *

forvalues id  = `firstid' / `lastid' {
	sum k if id == `id'
	local initk = r(mean)
	sum lambda if id == `id'
	local initlambda = r(mean)
	nl ( exlo =  exp(-( {lambda}*t)^{k})) if id==`id', initial(lambda `initlambda' k `initk')
	mat def coeffs = e(b)
	svmat coeffs
	replace newlambda = coeffs1[1] if id == `id'
	replace newk = coeffs2[1] if id == `id'
	drop coeffs?
}

gen newinvlambda = 1/newlambda
save "$temp\parameters_wave`wave'.dta", replace

restore
}

/**************************************************/
/* Append all files with parameters and save	  */
/**************************************************/

clear 
use "$temp\parameters_wave3.dta"
forv w = 4(1)7 {
append using "$temp\parameters_wave`w'.dta"
}

duplicates drop idauniq wave, force
keep idauniq wave sex newk newinvlambda 
save "$temp\Weibull_parameters.dta", replace

/**************************************************************************************************/
/*   Use SCALED life tables data to calculate fair annuity rates for each sex/age/birth year	  */
/**************************************************************************************************/

* this file is made in the "Fitted ELSA survival curves" do file 
clear
use "$temp\prob_survive_scaled.dta"


*** SET REAL INTEREST RATE ***

gen r = 0.0

*** Generate objective annuity rate ***

gen real_cost = 0
forv t = 0(1)60 {
gen prob_alive_in_`t' = 0
	forv age = 50(1)110 {
	replace prob_alive_in_`t' = prob`age' if `age' == age + `t'
	}
replace real_cost = real_cost + prob_alive_in_`t'/(1+r)^`t' 
}

gen annuity_rate = 1/real_cost

keep age dobyear sex annuity_rate

save "$temp\objective_annuity_rates_scaled.dta", replace

/**************************************************************************************/
/*   Merge Weibull parameters and objective annuity rates							  */
/**************************************************************************************/

clear
use "$temp\Weibull_parameters.dta"

merge 1:1 idauniq wave using "$temp\sample_indicators.dta"
assert _m != 1
keep if _m == 3 
drop _m 

merge 1:1 idauniq wave using "$savedata\wave1-7base.dta"
assert _merge != 1
keep if _m == 3
drop _m

merge m:1 age sex dobyear using "$temp\objective_annuity_rates_scaled.dta"
assert _m != 1
keep if _m == 3
drop _m

keep idauniq dobyear wave sex age newk newinvlambda annuity_rate exlo80 exlo90 sample1 sample2
rename newk k
rename newinvlambda lambda

sort wave idauniq 
gen id = _n
save "$temp\model_parameter_inputs.dta", replace

/**************************************************************************************/
/*   Generate subjective survival curve annuity rate, and subjective and objective LE */
/**************************************************************************************/

clear
use "$temp\model_parameter_inputs.dta"

gen r = 0.00

* subjective annuity rate *
gen subj_cost = 0
forv t = 0(1)60 {
gen subj_alive_in_`t' = 0
	forv age = 50(1)110 {
	replace subj_alive_in_`t' = exp(-((`t'/lambda)^(k))) if `t' == `age' - age
	}
replace subj_cost = subj_cost + subj_alive_in_`t'/(1+r)^`t' 
}

gen subj_ann_rate = 1/subj_cost

preserve
keep idauniq wave subj_ann_rate
save "$temp\subj_ann_rates.dta", replace
restore

*******************************************************
***
***		Fact on subjective and objective LE in sample
***
*******************************************************
* life expectancy *
merge m:1 age sex dobyear using "$temp\prob_survive_scaled.dta"
assert _m != 1
drop if _m == 2
drop _m 

* merge in weight *
merge 1:1 idauniq wave using "$savedata\wave1-7base.dta", keepusing(wgt)
assert _merge != 1
keep if _m == 3
drop _m

gen LE = 0
gen SLE = 0 
forv age = 50(1)110 {
    replace LE = LE + prob`age' if `age' > age  
}
forv t = 1(1)60 {
    replace SLE = SLE + subj_alive_in_`t'
}

gen diff = LE - SLE 
gen ratio = 1-  SLE / LE 

preserve 
collapse (mean) diff ratio [pw = wgt], by(sex)
bys sex: su diff  
bys sex : su ratio 
restore 

*******************************************************
***
***		Figure 5
***
*******************************************************

keep age wave sex annuity_rate subj_ann_rate sample1 
gen likes_ann = annuity_rate > subj_ann_rate
tab likes_ann

gen var1 = .
gen var2 = .
replace var1 = 0.02 if _n ==1
replace var2 = 0.02 if _n ==1
replace var1 = 0.06 if _n ==2
replace var2 = 0.06 if _n ==2
keep if subj < 0.2
twoway (scatter annuity_rate subj_ann_rate, mcolor(red) msize(vsmall) msymbol(circle)) (line var1 var2, lpattern(dash) lcolor(black)), ///
graphregion(color(white))  xscale(range(0.02,0.2)) xlabel(0.02[0.04]0.2)  yscale(range(0.02,0.06)) ylabel(0.02[0.01]0.06) legend(off) xtitle("Subjective annuity rate") ytitle("Objective annuity rate") aspect(0.5) ysize(3)
graph export "$output\Fig5.eps", replace


//***********************************************
//
//	Rest of the files prepares inputs to model
//
//***********************************************


/**************************************************************************************/
/*  		 Merge in wealth and state pension income								  */
/**************************************************************************************/

clear
use "$temp\model_parameter_inputs.dta"

* only have pension info up to wave 5 *
keep if inrange(wave,3,5)

* merge in pension wealth variables *
gen pension_wlth = . 
merge m:1 idauniq using "$penv3", keepusing(pripenw_spa)
drop if _m == 2
replace pension_wlth = pripenw_spa if wave == 3
drop _m
merge m:1 idauniq using "$penv4", keepusing(pripenw_spa) update replace 
drop if _m == 2
replace pension_wlth = pripenw_spa if wave == 4
drop _m
merge m:1 idauniq using "$penv5", keepusing(pripenw_spa) update replace
drop if _m == 2
replace pension_wlth = pripenw_spa if wave == 5
drop _m
drop pripenw_spa

* merge in financial wealth and state pension income variables *
gen fin_wlth = .
gen sp_inc = .
gen house = .
merge m:1 idauniq using "$findv3", keepusing(grossfw_bu_s spen_r_i hsval_hh_i)
drop if _m == 2
replace fin_wlth = grossfw_bu_s if wave == 3
replace sp_inc = spen_r_i if wave == 3
replace house = hsval_hh_i if wave  == 3
drop _m
merge m:1 idauniq using "$findv4", keepusing(grossfw_bu_s spen_r_i hsval_hh_i) update replace 
drop if _m == 2
replace fin_wlth = grossfw_bu_s if wave == 4
replace sp_inc = spen_r_i if wave == 4
replace house = hsval_hh_i if wave  == 4
drop _m
merge m:1 idauniq using "$findv5", keepusing(grossfw_bu_s spen_r_i hsval_hh_i) update replace
drop if _m == 2
replace fin_wlth = grossfw_bu_s if wave == 5
replace sp_inc = spen_r_i if wave == 5
replace house = hsval_hh_i if wave  == 5
drop _m
drop grossfw_bu_s
drop spen_r_i 

* merge in variable on whether deferred state pension from core data *
gen def_sp = .
merge m:1 idauniq using "$core4", keepusing(wpspd1b)
drop if _m == 2
replace def_sp = wpspd1b if wave == 4
drop _m
merge m:1 idauniq using "$core5", keepusing(wpspd1b) update replace 
drop if _m == 2
replace def_sp = wpspd1b if wave == 5
drop _m
drop wpspd1b def_sp

* merge in dob for women and calculate over/under SPA *
merge m:1 idauniq using "$dob"
* Keep only matched obsevations *
keep if _merge == 3
drop _merge
* calculate if over or under SPA *
gen d_birth = day(dob)
gen m_birth = month(dob)
gen y_birth = year(dob)
label var d_birth "Day of birth"
label var m_birth "Month of birth"
label var y_birth "Year of birth"
run "$dofiles\05.1 calc state pension age.do"

* merge in interview date *
merge m:1 idauniq using "$intdat"
assert _merge !=1
keep if _merge == 3 
drop _merge
* Put interview date into long form *
rename intdthw1 intdtiw1
gen intdtiw =.
label variable intdtiw "Interview Date"
	forvalues t = 1(1)8 {
	replace intdtiw = intdtiw`t' if wave == `t'
	drop intdtiw`t'
	format intdtiw %d
	}
* save interview dates for later use *
preserve
keep idauniq wave intdtiw
save "$temp\int_dates.dta", replace
restore

gen over_SPA = .
label var over_SPA "Over SPA at date of interview"
replace over_SPA = 1 if spad <= intdtiw & spad !=. & intdtiw !=.
replace over_SPA = 0 if spad > intdtiw & spad !=. & intdtiw !=.
drop spad* *_birth

* merge in whether have partner *
gen partner = . 
merge m:1 idauniq using "$dv3", keepusing(idauniq_p)
drop if _m == 2
replace partner = idauniq_p if wave == 3
drop _m
merge m:1 idauniq using "$dv4", keepusing(idauniq_p) update replace 
drop if _m == 2
replace partner = idauniq_p if wave == 4
drop _m
merge m:1 idauniq using "$dv5", keepusing(idauniq_p) update replace
drop if _m == 2
replace partner = idauniq_p if wave == 4
drop _m
drop idauniq_p

gen has_partner = idauniq != .

* create total wealth variable (to be annuitised)
gen assets = fin_wlth + pension_wlth
replace assets = assets/2 if has_partner

* make year of interview var *
gen y_int = year(intdtiw)

* put in pension credit guarantee level as minimum - levels from IFS tax and benefits survey*
gen pen_cred = .
replace pen_cred = 114.05 if y_int == 2006
replace pen_cred = 119.05 if y_int == 2007
replace pen_cred = 124.05 if y_int == 2008
replace pen_cred = 130.00 if y_int == 2009
replace pen_cred = 132.60 if y_int == 2010
replace pen_cred = 137.35 if y_int == 2011
order pen_cred, last

* make state pension and pension credit income an annual amount *
replace sp_inc = sp_inc *52
replace pen_cred = pen_cred*52

* generate housing consumption *
gen house_inc = house*0.04
replace house_inc = 0 if house == .

* Select sample *
keep if over_SPA
gen says_50 = exlo80 == 50 | exlo90 == 50
gen says_0 = exlo80 == 0 | exlo90 == 0
gen says_100 = exlo80 == 100 | exlo90 == 100
drop if sp_inc <=0
keep if assets > 0 & assets != .
drop over_SPA intdtiw fin_wlth pension_wlth y_int has_partner partner house hsval_hh_i exlo80 exlo90 
order idauniq, last

* 2 people aged 59 - impossible *
drop if age < 60

save "$temp\model_inputs.dta", replace
drop wave sex sample1 sample2
outsheet using "$savedata\model_inputs_subj.csv" , comma nonames replace


/**************************************************************************************/
/*  		 Set up scaled life table probs for 'accurate' expectations				  */
/**************************************************************************************/

/*************************************************************************/
/* Step 1: Make hazards													 */
/*************************************************************************/

clear
use "$temp\prob_survive_scaled.dta"

* generate vars for prob of death at specific age *

forv a = 50(1)109 {
	local b = `a' + 1
	gen hazard_`a' = 1- prob`b'/prob`a'
	replace hazard_`a' = . if age < 50 | prob110 == .
	}
gen hazard_110 = 1
replace hazard_110 = . if age < 50 | prob110 == .

keep age dobyear sex hazard* prob*

save "$temp\hazards_by_age.dta", replace


/*************************************************************************/
/* Step 2: Merge in with file on individuals' pension wealth etc	  	 */
/*************************************************************************/

clear
use "$temp\model_inputs.dta"

* merge in death probs *
merge m:1 age sex dobyear using "$temp\hazards_by_age.dta"
keep if _m == 3
drop _m
drop hazard_5* prob5*

* change from hazard and survival at particular age to hazard and survival in years time *
gen haz_fill = 1
* create hazards *
forv i = 0(1)51 {
	gen haz_`i' = .
	}

forv a  = 60(1)110 {
	forv t = 0(1)51 {
	replace haz_`t' = hazard_`a' if `a' == age + `t'
	replace haz_`t' = 1 if age + `t' > 110
	}
}

* create survival probs *

forv i = 0(1)51 {
	gen surv_`i' = .
	}
forv a  = 60(1)110 {
	forv t = 0(1)51 {
	local age = `a' - `t'
	if `a' - `t' >= 60 {
		replace surv_`t' = prob`a'/prob`age' if age == `age'
		}
	}
}

drop hazard* prob* surv* 
order house_inc pen_cred, last

drop says_50

* keep only the first observation of each person *
bys idauniq (wave) : gen obs = _n 
keep if obs == 1
drop obs

sort idauniq wave
replace id = _n

* save IDs *
preserve
keep id idauniq wave
save "$savedata\ids_firstonly.dta", replace
restore

* save version with all *
sort idauniq wave
drop wave sex dobyear idauniq
export delimited using "$fortran\annInputs.txt" , delim(tab) novarnames replace
save "$savedata\fortran_inputs.dta", replace
